package org.example

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}

object FilteredWordCount {
  def main(args: Array[String]): Unit = {
    val conf = new
        SparkConf().setAppName("FilteredWordCount").setMaster("local[2]")
    val ssc = new StreamingContext(conf, Seconds(5))
    val lines = ssc.socketTextStream("localhost", 9999)
    // 定义要过滤的单词列表
    val stopWords = Set("a", "an", "the", "this", "that")
    val words = lines.flatMap(_.split("\\s+"))
      .filter(_.matches("[a-zA-Z]+")) // 只保留纯字母单词
      .map(_.toLowerCase) // 转为小写
      .filter(!stopWords.contains(_)) // 过滤停用词
    val wordCounts = words.map(word => (word, 1)).reduceByKey(_ + _)
    wordCounts.print()
    ssc.start()
    ssc.awaitTermination()
  }

}
